import openai

openai.api_key = "your-api-key-here" 

question_type_coll = {
    "relative_distance": {
        "question": "Measuring from the closest point of each object, which of these objects (A, B, C, D) is the closest to the E?",
        "steps": '''
            Step1: Identify reference and target objects in the question.
            Step2: Estimate spatial positions for these objects.
            Step3: Calculate the Euclidean distance between each target object and the reference object.
            Step4: Sort the calculated distances and choose the target with the smallest distance as the answer.
            '''
    },
    "object_count": {
        "question": "How many instances of object A are there in this room?",
        "steps": '''
            Step1: Identify the target object category mentioned in the question.
            Step2: Locate and count all instances of this category within the given space.
            Step3: Return the total count as the answer.
            '''
    },
    "appearance_order": {
        "question": "What will be the first-time appearance order of the following categories in the video: A, B, C, D?",
        "steps": '''
            Step1: Identify all mentioned object categories.
            Step2: Analyze the video to determine the timestamp when each category first becomes visible.
            Step3: Sort the objects by their first appearance time.
            Step4: Return the sorted list as the answer.
            '''
    },
    "relative_direction": {
        "question": "If I am standing by object A and facing object B, is object C to my left, right, or back?",
        "steps": '''
            Step1: Identify the reference object, facing direction, and target object from the question.
            Step2: Establish the local coordinate frame based on the position and facing direction.
            Step3: Determine the relative position of the target object in this coordinate frame.
            Step4: Classify this relative position as left, right, front, or back, and return it as the answer.
            '''
    },
    "object_size": {
        "question": "What is the length of the longest dimension (length, width, or height) of object A in centimeters?",
        "steps": '''
            Step1: Identify the target object mentioned in the question.
            Step2: Retrieve or estimate the object's dimensions (length, width, height).
            Step3: Compare the dimensions to find the largest value.
            Step4: Return this maximum value as the answer.
            '''
    },
    "absolute_distance": {
        "question": "Measuring from the closest point of each object, what is the distance between object A and object B in meters?",
        "steps": '''
            Step1: Identify the two objects involved in the distance measurement.
            Step2: Estimate or retrieve the spatial positions of both objects.
            Step3: Compute the shortest Euclidean distance between any two points on the two objects.
            Step4: Return this distance as the answer.
            '''
    },
    "room_size": {
        "question": "What is the size of this room (in square meters)? If multiple rooms are shown, estimate the size of the combined space.",
        "steps": '''
            Step1: Identify the boundaries of the room or rooms shown.
            Step2: Estimate the dimensions (length and width) of each room.
            Step3: Calculate the area of each room and sum them if multiple rooms are present.
            Step4: Return the total area as the answer.
            '''
    },
    "route_plan": {
        "question": "You are a robot beginning at object A and facing object B. Navigate to object C. Fill in this route: 1. Go forward until object B 2. [?] 3. Go forward until object D 4. [?] 5. Go forward until object C.",
        "steps": '''
            Step1: Identify the starting point, initial facing direction, and destination.
            Step2: Analyze the spatial layout to determine intermediate obstacles and navigable paths.
            Step3: For each segment, decide the required movement (e.g., turn left, turn right) based on the current position and direction.
            Step4: Fill in the missing steps with the appropriate navigational actions to complete the route.
            '''
    }
}

def generate_prompt(question):
    return f"""
        You are an expert in spatial reasoning and question type classification.

        Given the following question:
        "{question}"

        You must do the following:
        1. Determine whether this question belongs to one of the known types listed below by comparing it to their templates.
        2. If it matches one of the types, return the solution steps for that type.
        3. If it does not match any, generate new solution steps following the same format: numbered steps, clear reasoning flow, and generic logic without referencing specific object names.

        Known question types with templates:
        {question_type_coll}

        Return only the solution steps in the following format:
        Step1: ...
        Step2: ...
        Step3: ...
        ...

        Do not include any explanations or justification.
        """

def get_solution_steps(question):
    response = openai.ChatCompletion.create(
        model="gpt-4o",
        messages=[
            {"role": "user", "content": generate_prompt(question)}
        ],
        temperature=0.3
    )
    return response['choices'][0]['message']['content'].strip()

if __name__ == "__main__":
    input_question = "How far is the desk from the chair?"
    steps = get_solution_steps(input_question)
    print("Solution Steps:\n", steps)
